1 1. Load and prepare data

# Load data
excel_path <- "../CRC_ProteoResistance_data/CPMSF_GPPF-PM-43_CPPF-PM-43_results_20240119.xlsx"
quan_df <- read_excel(excel_path, sheet = "Quan_data")

# Standardize first 3 columns
colnames(quan_df)[1:3] <- c("protein_id", "gene_symbol", "description")
colnames(quan_df) <- str_replace_all(colnames(quan_df), "TH9616", "TH9619")

# Clean intensities
for (i in 4:ncol(quan_df)) {
  quan_df[[i]] <- suppressWarnings(as.numeric(quan_df[[i]]))
  quan_df[[i]][is.na(quan_df[[i]]) | quan_df[[i]] <= 0] <- 1e-3
}

# Matrix & metadata
assay_data <- as.matrix(quan_df[, 4:ncol(quan_df)])
rownames(assay_data) <- make.unique(quan_df$gene_symbol)
row_metadata <- quan_df[, 1:3]
rownames(row_metadata) <- rownames(assay_data)

# Sample metadata
sample_names <- colnames(assay_data)
cell_line <- case_when(
  str_detect(sample_names, "HCT116") ~ "HCT116",
  str_detect(sample_names, "SW620") ~ "SW620",
  TRUE ~ NA_character_
)
treatment <- case_when(
  str_detect(sample_names, "parental") ~ "Parental",
  str_detect(sample_names, "TH9619") ~ "TH9619_R",
  str_detect(sample_names, "MTX") ~ "MTX_R",
  TRUE ~ "Unknown"
)
group <- paste(cell_line, treatment, sep = ".")

col_metadata <- DataFrame(
  sample = sample_names,
  cell_line = cell_line,
  treatment = treatment,
  group = group,
  row.names = sample_names
)

# SE object
se <- SummarizedExperiment(
  assays = list(norm = assay_data),
  rowData = row_metadata,
  colData = col_metadata
)

2 2. PCA plot

mat <- assay(se)
mat <- mat[apply(mat, 1, function(x) sd(x, na.rm = TRUE) > 0), ]
pca <- prcomp(t(mat), scale. = TRUE)
cd <- as.data.frame(colData(se))

fviz_pca_ind(pca,
             geom.ind = "point",
             col.ind = cd$group,
             palette = "jco",
             title = "PCA of Normalized Proteins")

3 3. Volcano plots & DEGs

cell_lines <- unique(colData(se)$cell_line)
de_results <- list()
deg_filtered <- list()

for (cl in cell_lines) {
  se_cl <- se[, colData(se)$cell_line == cl]
  treatments <- unique(se_cl$treatment)
  treatments <- treatments[treatments != "Parental"]

  for (tr in treatments) {
    comp_name <- paste0(cl, "_", tr, "_vs_Parental")
    selected <- se_cl[, se_cl$treatment %in% c("Parental", tr)]
    selected$treatment <- droplevels(factor(selected$treatment))

    design <- model.matrix(~ 0 + selected$treatment)
    colnames(design) <- levels(selected$treatment)
    fit <- lmFit(assay(selected), design)
    contrast_matrix <- makeContrasts(paste0(tr, " - Parental"), levels = design)
    fit2 <- contrasts.fit(fit, contrast_matrix)
    fit2 <- eBayes(fit2)

    top_res <- topTable(fit2, coef = 1, number = Inf) %>%
      rownames_to_column("gene_symbol") %>%
      mutate(comparison = comp_name)

    degs <- top_res %>% filter(adj.P.Val < 0.05, abs(logFC) > 1)

    de_results[[comp_name]] <- top_res
    deg_filtered[[comp_name]] <- degs
  }
}

4 4. Volcano plots

for (comp in names(deg_filtered)) {
  df <- de_results[[comp]] %>%
    mutate(Regulation = case_when(
      adj.P.Val < 0.005 & logFC > 2 ~ "Up",
      adj.P.Val < 0.005 & logFC < -2 ~ "Down",
      TRUE ~ "NotSig"
    ))

top10 <- df %>%
  filter(adj.P.Val < 0.05, abs(logFC) > 2) %>%
  arrange(adj.P.Val) %>%
  slice(1:10)



  p <- ggplot(df, aes(x = logFC, y = -log10(adj.P.Val))) +
    geom_point(aes(color = Regulation), alpha = 0.6) +
    geom_point(data = top10, shape = 21, fill = NA, color = "black", size = 2.5, stroke = 0.8) +
    geom_text_repel(data = top10, aes(label = gene_symbol), color = "black", size = 3) +
    geom_vline(xintercept = c(-2, 2), linetype = "dashed", color = "blue") +
    geom_hline(yintercept = -log10(0.0005), linetype = "dashed", color = "red") +
    scale_color_manual(values = c("Up" = "firebrick", "Down" = "royalblue", "NotSig" = "grey70")) +
    theme_minimal(base_size = 12) +
    labs(title = paste("Volcano Plot –", comp),
         x = "log2 Fold Change",
         y = "-log10 Adjusted P-Value")

  print(p)
}

5 5. DEG table

for (comp in names(deg_filtered)) {
  dat <- deg_filtered[[comp]] %>%
    arrange(adj.P.Val) %>%
    dplyr::select(gene_symbol, logFC, adj.P.Val, AveExpr)

  DT::datatable(dat, 
                caption = htmltools::tags$caption(
                  style = 'caption-side: top; text-align: left;',
                  paste("Top DEGs for", comp)),
                options = list(pageLength = 10, scrollX = TRUE))
}

6 6. Functional Enrichment (KEGG)

library(clusterProfiler)
library(org.Hs.eg.db)

enrich_results <- list()

for (comp in names(deg_filtered)) {
  genes <- deg_filtered[[comp]]$gene_symbol

  # Map gene symbols to ENTREZ IDs
  entrez_ids <- tryCatch({
    bitr(genes,
         fromType = "SYMBOL",
         toType = "ENTREZID",
         OrgDb = org.Hs.eg.db)
  }, error = function(e) {
    message("❌ Failed to convert symbols for ", comp, ": ", e$message)
    return(NULL)
  })

  if (!is.null(entrez_ids) && nrow(entrez_ids) > 5) {
    # Run KEGG enrichment
    kegg_res <- tryCatch({
      enrichKEGG(gene = entrez_ids$ENTREZID, organism = "hsa")
    }, error = function(e) {
      message("❌ KEGG enrichment failed for ", comp, ": ", e$message)
      return(NULL)
    })

    # Check if valid and non-empty
    if (!is.null(kegg_res) &&
        inherits(kegg_res, "enrichResult") &&
        nrow(kegg_res@result) > 0 &&
        "Description" %in% colnames(kegg_res@result) &&
        any(!is.na(kegg_res@result$Description))) {

      enrich_results[[comp]] <- kegg_res

      tryCatch({
        print(barplot(kegg_res,
                      showCategory = 10,
                      title = paste("KEGG Pathways -", comp)))
      }, error = function(e) {
        message("⚠️ Plotting failed for ", comp, ": ", e$message)
      })

    } else {
      message("⚠️ No enriched KEGG terms found for ", comp)
    }
  } else {
    message("⚠️ Not enough valid genes mapped for ", comp)
  }
}

library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)

go_results <- list()

for (comp in names(deg_filtered)) {
  genes <- deg_filtered[[comp]]$gene_symbol

  entrez_ids <- tryCatch({
    bitr(genes,
         fromType = "SYMBOL",
         toType = "ENTREZID",
         OrgDb = org.Hs.eg.db)
  }, error = function(e) {
    message("❌ Failed to convert symbols for ", comp, ": ", e$message)
    return(NULL)
  })

  if (!is.null(entrez_ids) && nrow(entrez_ids) > 5) {

    # Biological Process (BP)
    go_bp <- tryCatch({
      enrichGO(gene = entrez_ids$ENTREZID,
               OrgDb = org.Hs.eg.db,
               keyType = "ENTREZID",
               ont = "BP",
               pAdjustMethod = "BH",
               pvalueCutoff = 0.05,
               readable = TRUE)
    }, error = function(e) NULL)

    # Molecular Function (MF)
    go_mf <- tryCatch({
      enrichGO(gene = entrez_ids$ENTREZID,
               OrgDb = org.Hs.eg.db,
               keyType = "ENTREZID",
               ont = "MF",
               pAdjustMethod = "BH",
               pvalueCutoff = 0.05,
               readable = TRUE)
    }, error = function(e) NULL)

    # Cellular Component (CC)
    go_cc <- tryCatch({
      enrichGO(gene = entrez_ids$ENTREZID,
               OrgDb = org.Hs.eg.db,
               keyType = "ENTREZID",
               ont = "CC",
               pAdjustMethod = "BH",
               pvalueCutoff = 0.05,
               readable = TRUE)
    }, error = function(e) NULL)

    # Store in list
    go_results[[comp]] <- list(
      BP = go_bp,
      MF = go_mf,
      CC = go_cc
    )

    # Plot if not empty
    if (!is.null(go_bp) && nrow(go_bp) > 0) {
      print(barplot(go_bp, showCategory = 10,
                    title = paste("GO: Biological Process -", comp)))
    } else {
      message("⚠️ No GO:BP terms enriched for ", comp)
    }

    if (!is.null(go_mf) && nrow(go_mf) > 0) {
      print(barplot(go_mf, showCategory = 10,
                    title = paste("GO: Molecular Function -", comp)))
    } else {
      message("⚠️ No GO:MF terms enriched for ", comp)
    }

    if (!is.null(go_cc) && nrow(go_cc) > 0) {
      print(barplot(go_cc, showCategory = 10,
                    title = paste("GO: Cellular Component -", comp)))
    } else {
      message("⚠️ No GO:CC terms enriched for ", comp)
    }

  } else {
    message("⚠️ Not enough mappable genes for ", comp)
  }
}

# Create multi-panel GO enrichment plots (barplots) for each comparison
for (comp in names(go_results)) {
  plots <- list()

  for (ont in c("BP", "MF", "CC")) {
    res <- go_results[[comp]][[ont]]

    if (!is.null(res) && nrow(res) > 0) {
      p <- barplot(res, showCategory = 10) +
        ggtitle(paste(ont, "-", comp)) +
        theme_minimal(base_size = 12)
      plots[[ont]] <- p
    }
  }

  # Combine all available plots (BP, MF, CC) for the current comparison
  if (length(plots) > 0) {
    combined_plot <- wrap_plots(plots, ncol = 1)
    print(combined_plot)
  } else {
    cat("### ⚠️ No GO terms enriched for", comp, "\n\n")
  }
}